{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Code used to create tables in the PIC paper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import libraries\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import pymysql"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Connect to PIC\n",
    "con = pymysql.connect(host='localhost',\n",
    "                             user='root',\n",
    "                             password='13163232289zx',\n",
    "                             db='pmimic',\n",
    "                             cursorclass=pymysql.cursors.DictCursor)\n",
    "cur=con.cursor()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "How many patients who had multiple hospital admissions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   count(obs)\n",
      "0         468\n"
     ]
    }
   ],
   "source": [
    "query = \\\n",
    "\"\"\"\n",
    "WITH chartobs AS (\n",
    "SELECT subject_id, count(subject_id) as obs\n",
    "FROM admissions\n",
    "GROUP BY subject_id)\n",
    "SELECT count(obs)\n",
    "FROM chartobs\n",
    "where obs>1;\n",
    "\"\"\"\n",
    "query_output = pd.read_sql_query(query,con)\n",
    "print(query_output.head())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "These are the details that we would like to include in Table 2, grouped by first careunit:\n",
    "  Patients, no. (% of total patients)\n",
    "  Hospital admissions, no. (% of total admissions)\n",
    "  Distinct ICU stays, no. (% of total unit stays)\n",
    "  Age, yrs, mean ± SD\n",
    "  Gender, male, percent of unit stays\n",
    "  ICU length of stay, median days (IQR)\n",
    "  Hospital length of stay, median days (IQR)\n",
    "  ICU mortality, percent of unit stays\n",
    "  Hospital mortality, percent of unit stays"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Extract and review the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   subject_id  hadm_id  icustay_id      hosp_admittime      hosp_dischtime  \\\n",
      "0           2   100183      200715 2081-08-18 09:31:33 2081-09-28 10:06:00   \n",
      "1           3   100490      201022 2108-06-27 17:07:48 2108-07-04 10:17:00   \n",
      "2           4   102191      202747 2081-09-25 10:28:15 2081-10-08 10:46:00   \n",
      "3           5   100548      201080 2075-08-19 15:26:27 2075-08-31 11:21:00   \n",
      "4           6   100199      200731 2086-08-31 14:17:23 2086-09-11 13:39:00   \n",
      "\n",
      "  first_careunit                 dob                 dod          icu_intime  \\\n",
      "0    General ICU 2068-11-30 00:00:00 2081-09-28 10:06:00 2081-08-18 09:31:32   \n",
      "1    General ICU 2100-02-18 00:00:00                 NaT 2108-06-27 17:07:48   \n",
      "2    General ICU 2069-11-08 10:30:00                 NaT 2081-09-25 21:29:52   \n",
      "3    General ICU 2059-10-08 00:00:00 2075-08-31 11:21:00 2075-08-19 15:26:26   \n",
      "4    General ICU 2079-05-13 00:00:00                 NaT 2086-08-31 14:17:22   \n",
      "\n",
      "          icu_outtime  icu_los  hosp_los gender  age_hosp_in  age_icu_in  \\\n",
      "0 2081-09-28 10:06:00  41.0239   41.0239      M      12.7244     12.7244   \n",
      "1 2108-07-04 10:17:00   6.7147    6.7147      M       8.3609      8.3609   \n",
      "2 2081-09-26 10:00:56   0.5216   13.0123      F      11.8877     11.8889   \n",
      "3 2075-08-31 11:21:00  11.8296   11.8295      M      15.8757     15.8757   \n",
      "4 2086-09-11 13:39:00  10.9734   10.9733      F       7.3085      7.3085   \n",
      "\n",
      "   hospital_expire_flag  icu_expire_flag  \n",
      "0                     1                1  \n",
      "1                     0                0  \n",
      "2                     0                0  \n",
      "3                     1                1  \n",
      "4                     0                0  \n"
     ]
    }
   ],
   "source": [
    "# Join admissions, icustays, and patients tables\n",
    "\n",
    "query = \\\n",
    "\"\"\"\n",
    "WITH population as (\n",
    "SELECT a.subject_id, a.hadm_id, i.icustay_id, \n",
    "    a.admittime as hosp_admittime, a.dischtime as hosp_dischtime, \n",
    "    i.first_careunit, \n",
    "    p.dob, p.dod, i.intime as icu_intime, i.outtime as icu_outtime, \n",
    "    round(timestampdiff(SECOND,i.intime,i.outtime)/60/60/24,4) as icu_los,\n",
    "    round(timestampdiff(SECOND,a.admittime,a.dischtime)/60/60/24,4)   as hosp_los, \n",
    "    p.gender, \n",
    "    round(timestampdiff(SECOND,p.dob,a.admittime)/60/60/24/365,4) as age_hosp_in,\n",
    "    round(timestampdiff(SECOND,p.dob,i.intime)/60/60/24/365,4) as age_icu_in,\n",
    "    hospital_expire_flag,\n",
    "    CASE WHEN p.dod IS NOT NULL \n",
    "        AND p.dod >= i.intime\n",
    "        AND p.dod <= i.outtime THEN 1 \n",
    "        ELSE 0 END AS icu_expire_flag\n",
    "FROM admissions a\n",
    "INNER JOIN icustays i\n",
    "ON a.hadm_id = i.hadm_id\n",
    "INNER JOIN patients p\n",
    "ON a.subject_id = p.subject_id\n",
    "ORDER BY a.subject_id, i.intime\n",
    ")\n",
    "SELECT *\n",
    "FROM population;\n",
    "\"\"\"\n",
    "\n",
    "query_output = pd.read_sql_query(query,con)\n",
    "print(query_output.head())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Distinct patients, no. (% of total patients)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Total patients: 12881\n",
      "\n",
      "Number of patients by first careunit:\n",
      "\n",
      "first_careunit  CICU  General ICU  NICU  PICU  SICU\n",
      "subject_id      2583         2642  3137  1953  2566\n",
      "\n",
      "Proportion of total hospital admissions:\n",
      "\n",
      "first_careunit       CICU  General ICU       NICU       PICU       SICU\n",
      "subject_id      20.052791     20.51083  24.353699  15.161866  19.920814\n"
     ]
    }
   ],
   "source": [
    "print('\\nTotal patients: {}'\\\n",
    "    .format(len(query_output.subject_id.unique())))\n",
    "\n",
    "print('\\nNumber of patients by first careunit:\\n')\n",
    "print(query_output[['first_careunit','subject_id']] \\\n",
    "                    .drop_duplicates(['subject_id']) \\\n",
    "                    .groupby('first_careunit').count().T)\n",
    "    \n",
    "print('\\nProportion of total hospital admissions:\\n')\n",
    "print(query_output[['first_careunit','subject_id']] \\\n",
    "                    .drop_duplicates(['subject_id']) \\\n",
    "                    .groupby('first_careunit') \\\n",
    "                    .count().T/len(query_output.subject_id.unique())*100)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Hospital admissions, no. (% of total admissions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Total hospital admissions: 13449\n",
      "\n",
      "Number of hospital admissions by first careunit:\n",
      "\n",
      "first_careunit  CICU  General ICU  NICU  PICU  SICU\n",
      "hadm_id         2638         2725  3205  2084  2797\n",
      "\n",
      "Proportion of total hospital admissions:\n",
      "\n",
      "first_careunit       CICU  General ICU       NICU       PICU       SICU\n",
      "hadm_id         19.614841    20.261729  23.830768  15.495576  20.797085\n"
     ]
    }
   ],
   "source": [
    "print('\\nTotal hospital admissions: {}'\\\n",
    "    .format(len(query_output.hadm_id.unique())))\n",
    "\n",
    "print('\\nNumber of hospital admissions by first careunit:\\n')\n",
    "print(query_output[['first_careunit','hadm_id']] \\\n",
    "                    .drop_duplicates(['hadm_id']) \\\n",
    "                    .groupby('first_careunit').count().T)\n",
    "    \n",
    "print('\\nProportion of total hospital admissions:\\n')\n",
    "print(query_output[['first_careunit','hadm_id']] \\\n",
    "                    .drop_duplicates(['hadm_id']) \\\n",
    "                    .groupby('first_careunit') \\\n",
    "                    .count().T/len(query_output.hadm_id.unique())*100)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Distinct ICU stays, no. (% of total unit stays)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Total ICU stays: 13941\n",
      "\n",
      "Number of ICU stays by careunit:\n",
      "\n",
      "first_careunit  CICU  General ICU  NICU  PICU  SICU\n",
      "icustay_id      2803         2788  3282  2166  2902\n",
      "\n",
      "Proportion of total ICU stays:\n",
      "\n",
      "first_careunit       CICU  General ICU      NICU       PICU       SICU\n",
      "icustay_id      20.106162    19.998565  23.54207  15.536906  20.816297\n"
     ]
    }
   ],
   "source": [
    "print('\\nTotal ICU stays: {}'\\\n",
    "    .format(len(query_output.icustay_id.unique())))\n",
    "\n",
    "print('\\nNumber of ICU stays by careunit:\\n')\n",
    "print(query_output[['first_careunit','icustay_id']] \\\n",
    "          .groupby('first_careunit').count().T)\n",
    "\n",
    "print('\\nProportion of total ICU stays:\\n')\n",
    "print(query_output[['first_careunit','icustay_id']] \\\n",
    "          .groupby('first_careunit') \\\n",
    "          .count().T/len(query_output.icustay_id.unique())*100)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Age, years, mean (IQR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mean age, years: 2.480802532099554 \n",
      "Lower quartile age, years: 0.1194 \n",
      "Upper quartile age, years: 3.3397 \n",
      " \n",
      "Mean age by careunit, years:\n",
      " \n",
      "first_careunit      CICU  General ICU      NICU      PICU      SICU\n",
      "age_icu_in      2.462498     3.775664  0.040844  3.622367  3.161902\n",
      "\n",
      "Lower quartile by careunit, years:\n",
      " \n",
      "first_careunit     CICU  General ICU   NICU    PICU     SICU\n",
      "0.25                                                        \n",
      "age_icu_in      0.45265      0.41235  0.002  0.5123  0.37945\n",
      "\n",
      "Upper quartile by careunit, years:\n",
      " \n",
      "first_careunit     CICU  General ICU      NICU      PICU      SICU\n",
      "0.75                                                              \n",
      "age_icu_in      3.13975         6.27  0.049475  5.704025  4.869075\n"
     ]
    }
   ],
   "source": [
    "print('Mean age, years: {} '.format(query_output.age_icu_in.mean()))\n",
    "print('Lower quartile age, years: {} '.format(query_output.age_icu_in.quantile(0.25)))\n",
    "print('Upper quartile age, years: {} \\n '.format(query_output.age_icu_in.quantile(0.75)))\n",
    "\n",
    "print('Mean age by careunit, years:\\n ')\n",
    "print(query_output[['first_careunit','age_icu_in']] \\\n",
    "      .groupby('first_careunit').mean().T)\n",
    "\n",
    "print('\\nLower quartile by careunit, years:\\n ')\n",
    "print(query_output[['first_careunit','age_icu_in']] \\\n",
    "      .groupby('first_careunit').quantile(0.25).T)\n",
    "\n",
    "print('\\nUpper quartile by careunit, years:\\n ')\n",
    "print(query_output[['first_careunit','age_icu_in']] \\\n",
    "      .groupby('first_careunit').quantile(0.75).T)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Gender, male, percent of unit stays"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Gender:\n",
      "\n",
      "gender\n",
      "F    5924\n",
      "M    8017\n",
      "Name: gender, dtype: int64\n",
      "gender\n",
      "F    42.493365\n",
      "M    57.506635\n",
      "Name: gender, dtype: float64\n",
      "Gender by careunit:\n",
      "\n",
      "first_careunit  gender\n",
      "CICU            F         1412\n",
      "                M         1391\n",
      "General ICU     F         1076\n",
      "                M         1712\n",
      "NICU            F         1298\n",
      "                M         1984\n",
      "PICU            F          916\n",
      "                M         1250\n",
      "SICU            F         1222\n",
      "                M         1680\n",
      "Name: gender, dtype: int64\n",
      "\n",
      "Proportion by unit:\n",
      "\n",
      "first_careunit  gender\n",
      "CICU            F         50.374599\n",
      "                M         49.625401\n",
      "General ICU     F         38.593974\n",
      "                M         61.406026\n",
      "NICU            F         39.549055\n",
      "                M         60.450945\n",
      "PICU            F         42.289935\n",
      "                M         57.710065\n",
      "SICU            F         42.108890\n",
      "                M         57.891110\n",
      "Name: gender, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print('Gender:\\n')\n",
    "print(query_output.groupby('gender').gender.count())\n",
    "print(query_output.groupby('gender').gender.count() \\\n",
    "     /query_output.gender.count()*100)\n",
    "\n",
    "print('Gender by careunit:\\n')\n",
    "print(query_output.groupby(['first_careunit','gender']).gender.count())\n",
    "\n",
    "print('\\nProportion by unit:\\n')\n",
    "print(query_output.groupby(['first_careunit','gender']) \\\n",
    "    .gender.count()/query_output.groupby(['first_careunit']).gender.count()*100)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "ICU length of stay, mean days (IQR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mean ICU length of stay, days: 9.30284736389066\n",
      "Lower quartile ICU length of stay, days: 0.9217\n",
      "Upper quartile ICU length of stay, days: 9.1691\n",
      "\n",
      "Mean length of ICU stay by careunit, days:\n",
      " \n",
      "first_careunit      CICU  General ICU       NICU      PICU      SICU\n",
      "icu_los         3.926365     7.292125  21.557103  9.710644  2.264398\n",
      "\n",
      "Lower quartile length of ICU stay, days:\n",
      " \n",
      "first_careunit    CICU  General ICU    NICU     PICU    SICU\n",
      "0.25                                                        \n",
      "icu_los         0.9172     0.886975  2.5477  1.98555  0.7767\n",
      "\n",
      "Upper quartile length of ICU stay, days:\n",
      " \n",
      "first_careunit    CICU  General ICU      NICU       PICU      SICU\n",
      "0.75                                                              \n",
      "icu_los         3.9487     8.911775  32.82785  11.136225  1.625525\n"
     ]
    }
   ],
   "source": [
    "print('Mean ICU length of stay, days: {}'.format(query_output.icu_los.mean()))\n",
    "print('Lower quartile ICU length of stay, days: {}' \\\n",
    "    .format(query_output.icu_los.quantile(0.25)))\n",
    "print('Upper quartile ICU length of stay, days: {}\\n' \\\n",
    "    .format(query_output.icu_los.quantile(0.75)))\n",
    "\n",
    "print('Mean length of ICU stay by careunit, days:\\n ')\n",
    "print(query_output[['first_careunit','icu_los']] \\\n",
    "      .groupby('first_careunit').mean().T)\n",
    "\n",
    "print('\\nLower quartile length of ICU stay, days:\\n ')\n",
    "print(query_output[['first_careunit','icu_los']] \\\n",
    "      .groupby('first_careunit').quantile(0.25).T)\n",
    "\n",
    "print('\\nUpper quartile length of ICU stay, days:\\n ')\n",
    "print(query_output[['first_careunit','icu_los']] \\\n",
    "      .groupby('first_careunit').quantile(0.75).T)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Hospital length of stay, mean days (IQR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mean length of hospital stay, days: 17.589063320692958\n",
      "Lower quartile length of hospital stay, days: 7.0254\n",
      "Upper quartile length of hospital stay, days: 20.9715\n",
      "\n",
      "Mean length of hospital stay, days:\n",
      " \n",
      "first_careunit      CICU  General ICU       NICU       PICU       SICU\n",
      "hosp_los        16.56631    12.777065  27.078793  14.520918  14.653829\n",
      "\n",
      "Lower quartile length of hospital stay, days:\n",
      " \n",
      "first_careunit      CICU  General ICU    NICU    PICU   SICU\n",
      "0.25                                                        \n",
      "hosp_los        9.031225       3.8202  9.6134  4.5536  7.023\n",
      "\n",
      "Upper quartile length of hospital stay, days:\n",
      " \n",
      "first_careunit     CICU  General ICU    NICU      PICU     SICU\n",
      "0.75                                                           \n",
      "hosp_los        19.1531      16.2091  37.839  16.71495  18.7365\n"
     ]
    }
   ],
   "source": [
    "print('Mean length of hospital stay, days: {}' \\\n",
    "    .format(query_output.drop_duplicates(['hadm_id']).hosp_los.mean()))\n",
    "print('Lower quartile length of hospital stay, days: {}' \\\n",
    "    .format(query_output.drop_duplicates(['hadm_id']).hosp_los.quantile(0.25)))\n",
    "print('Upper quartile length of hospital stay, days: {}\\n' \\\n",
    "    .format(query_output.drop_duplicates(['hadm_id']).hosp_los.quantile(0.75)))\n",
    "\n",
    "print('Mean length of hospital stay, days:\\n ')\n",
    "print(query_output.drop_duplicates(['hadm_id']) \\\n",
    "      [['first_careunit','hosp_los']] \\\n",
    "      .groupby('first_careunit').mean().T)\n",
    "\n",
    "print('\\nLower quartile length of hospital stay, days:\\n ')\n",
    "print(query_output.drop_duplicates(['hadm_id']) \\\n",
    "      [['first_careunit','hosp_los']] \\\n",
    "      .groupby('first_careunit').quantile(0.25).T)\n",
    "\n",
    "print('\\nUpper quartile length of hospital stay, days:\\n ')\n",
    "print(query_output.drop_duplicates(['hadm_id']) \\\n",
    "      [['first_careunit','hosp_los']] \\\n",
    "      .groupby('first_careunit').quantile(0.75).T)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "ICU mortality, percent of unit stays"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ICU mortality, number:\n",
      "\n",
      "icu_expire_flag\n",
      "0    12986\n",
      "1      955\n",
      "Name: icu_expire_flag, dtype: int64\n",
      "\n",
      "ICU mortality, %:\n",
      "\n",
      "icu_expire_flag\n",
      "0    93.149702\n",
      "1     6.850298\n",
      "Name: icu_expire_flag, dtype: float64\n",
      "\n",
      "ICU mortality by careunit:\n",
      "\n",
      "first_careunit  icu_expire_flag\n",
      "CICU            0                  2755\n",
      "                1                    48\n",
      "General ICU     0                  2374\n",
      "                1                   414\n",
      "NICU            0                  3046\n",
      "                1                   236\n",
      "PICU            0                  1966\n",
      "                1                   200\n",
      "SICU            0                  2845\n",
      "                1                    57\n",
      "Name: icu_expire_flag, dtype: int64\n",
      "\n",
      "Proportion by unit:\n",
      "\n",
      "first_careunit  icu_expire_flag\n",
      "CICU            0                  98.287549\n",
      "                1                   1.712451\n",
      "General ICU     0                  85.150646\n",
      "                1                  14.849354\n",
      "NICU            0                  92.809263\n",
      "                1                   7.190737\n",
      "PICU            0                  90.766390\n",
      "                1                   9.233610\n",
      "SICU            0                  98.035837\n",
      "                1                   1.964163\n",
      "Name: icu_expire_flag, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print('ICU mortality, number:\\n')\n",
    "print(query_output \\\n",
    "    .groupby(['icu_expire_flag']) \\\n",
    "    .icu_expire_flag.count())\n",
    "\n",
    "print('\\nICU mortality, %:\\n')\n",
    "print(query_output.groupby(['icu_expire_flag']) \\\n",
    "    .icu_expire_flag.count() / query_output.icu_expire_flag.count()*100)\n",
    "\n",
    "print('\\nICU mortality by careunit:\\n')\n",
    "print(query_output \\\n",
    "    .groupby(['first_careunit','icu_expire_flag']) \\\n",
    "    .icu_expire_flag.count())\n",
    "\n",
    "print('\\nProportion by unit:\\n')\n",
    "print(query_output \\\n",
    "    .groupby(['first_careunit','icu_expire_flag']) \\\n",
    "    .icu_expire_flag.count()/query_output \\\n",
    "    .groupby(['first_careunit']).icu_expire_flag.count()*100)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Hospital mortality, percent of hospital stays"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Hospital mortality, number:\n",
      "\n",
      "hospital_expire_flag\n",
      "0    12478\n",
      "1      971\n",
      "Name: hospital_expire_flag, dtype: int64\n",
      "\n",
      "Hospital mortality, %:\n",
      "\n",
      "hospital_expire_flag\n",
      "0    92.780132\n",
      "1     7.219868\n",
      "Name: hospital_expire_flag, dtype: float64\n",
      "\n",
      "Hospital mortality:\n",
      "\n",
      "first_careunit  hospital_expire_flag\n",
      "CICU            0                       2585\n",
      "                1                         53\n",
      "General ICU     0                       2308\n",
      "                1                        417\n",
      "NICU            0                       2966\n",
      "                1                        239\n",
      "PICU            0                       1879\n",
      "                1                        205\n",
      "SICU            0                       2740\n",
      "                1                         57\n",
      "Name: hospital_expire_flag, dtype: int64\n",
      "\n",
      "Proportion by unit:\n",
      "\n",
      "first_careunit  hospital_expire_flag\n",
      "CICU            0                       97.990902\n",
      "                1                        2.009098\n",
      "General ICU     0                       84.697248\n",
      "                1                       15.302752\n",
      "NICU            0                       92.542902\n",
      "                1                        7.457098\n",
      "PICU            0                       90.163148\n",
      "                1                        9.836852\n",
      "SICU            0                       97.962102\n",
      "                1                        2.037898\n",
      "Name: hospital_expire_flag, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "print('Hospital mortality, number:\\n')\n",
    "print(query_output.drop_duplicates(['hadm_id']) \\\n",
    "    .groupby(['hospital_expire_flag']) \\\n",
    "    .hospital_expire_flag.count())\n",
    "\n",
    "print('\\nHospital mortality, %:\\n')\n",
    "print(query_output.drop_duplicates(['hadm_id']) \\\n",
    "    .groupby(['hospital_expire_flag']) \\\n",
    "    .hospital_expire_flag.count() \\\n",
    "    / query_output.drop_duplicates(['hadm_id']).hospital_expire_flag.count()*100)\n",
    "\n",
    "print('\\nHospital mortality:\\n')\n",
    "print(query_output.drop_duplicates(['hadm_id']) \\\n",
    "    .groupby(['first_careunit','hospital_expire_flag']) \\\n",
    "    .hospital_expire_flag.count())\n",
    "\n",
    "print('\\nProportion by unit:\\n')\n",
    "print(query_output.drop_duplicates(['hadm_id']) \\\n",
    "    .groupby(['first_careunit','hospital_expire_flag']) \\\n",
    "    .hospital_expire_flag.count()/query_output.drop_duplicates(['hadm_id']) \\\n",
    "    .groupby(['first_careunit']).hospital_expire_flag.count()*100)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Table 3: Distribution of primary diagnosis by care unit for patients in the PIC database"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   subject_id  hadm_id  icustay_id first_careunit icd10_code_cn icd_first1  \\\n",
      "0           2   100183      200715    General ICU       Z94.801          z   \n",
      "1           3   100490      201022    General ICU       R40.201          r   \n",
      "2           4   102191      202747    General ICU       K56.200          k   \n",
      "3           5   100548      201080    General ICU       C95.901          c   \n",
      "4           6   100199      200731    General ICU        N19.02          n   \n",
      "\n",
      "  icd_first2 icd_first4  \n",
      "0          9          .  \n",
      "1          4          .  \n",
      "2          5          .  \n",
      "3          9          .  \n",
      "4          1          .  \n"
     ]
    }
   ],
   "source": [
    "query = \\\n",
    "\"\"\"\n",
    "\n",
    "SELECT a.subject_id,a.hadm_id,i.icustay_id,i.first_careunit,icd.icd10_code_cn,\n",
    "    lower(LEFT(icd.icd10_code_cn,1)) AS icd_first1, SUBSTRING(icd.icd10_code_cn,2,1) AS icd_first2,\n",
    "    SUBSTRING(icd.icd10_code_cn,4,1) AS icd_first4\n",
    "    FROM admissions a\n",
    "    INNER JOIN icustays i\n",
    "    ON a.hadm_id = i.hadm_id\n",
    "    INNER JOIN patients p\n",
    "    ON a.subject_id = p.subject_id\n",
    "    INNER JOIN diagnoses_icd icd\n",
    "    ON a.hadm_id=icd.hadm_id\n",
    "\"\"\"\n",
    "query_output = pd.read_sql_query(query,con)\n",
    "print(query_output.head())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "#process icd-10 codes category\n",
    "def icd_category(icd1,icd2,icd4):\n",
    "    icd2=int(icd2)\n",
    "    if icd1=='a' or icd1=='b':\n",
    "        icd_category='A00-B99'\n",
    "    if icd1=='c':\n",
    "        icd_category='C00-D48'\n",
    "    if icd1=='d' and icd2>=0 and icd2<=4:\n",
    "        icd_category='C00-D48'\n",
    "    if icd1=='d' and icd2>=5 and icd2<=8:\n",
    "        icd_category='D50-D89'\n",
    "    if icd1=='e':\n",
    "        icd_category='E00-E90'\n",
    "    if icd1=='f':\n",
    "        icd_category='F00-F90'\n",
    "    if icd1=='g':\n",
    "        icd_category='G00-G90'\n",
    "    if icd1=='h' and icd2>=0 and icd2<=5:\n",
    "        icd_category='H00-H59'\n",
    "    if icd1=='h' and icd2>=6 and icd2<=9:\n",
    "        icd_category='H60-H95'\n",
    "    if icd1=='i':\n",
    "        icd_category='I00-I99'\n",
    "    if icd1=='j':\n",
    "        icd_category='J00-J99'\n",
    "    if icd1=='k':\n",
    "        icd_category='K00-K93'\n",
    "    if icd1=='l':\n",
    "        icd_category='L00-L99'\n",
    "    if icd1=='m' and icd4=='.':\n",
    "        icd_category='M00-M99'\n",
    "    if icd1=='n':\n",
    "        icd_category='N00-N99'\n",
    "    if icd1=='o':\n",
    "        icd_category='O00-O99'\n",
    "    if icd1=='p':\n",
    "        icd_category='P00-P96'\n",
    "    if icd1=='q':\n",
    "        icd_category='Q00-Q99'\n",
    "    if icd1=='r':\n",
    "        icd_category='R00-R99'\n",
    "    if icd1=='s' or icd1=='t':\n",
    "        icd_category='S00-T98'\n",
    "    if icd1=='v' or icd1=='w' or icd1=='x' or icd1=='y':\n",
    "        icd_category='V01-Y98'\n",
    "    if icd1=='z':\n",
    "        icd_category='Z00-Z99'\n",
    "    if icd1=='m' and icd4!='.':\n",
    "        icd_category='M'\n",
    "    return icd_category\n",
    "    \n",
    "query_output['icd_category']=list(map(lambda icd1,icd2,icd4:icd_category(icd1,icd2,icd4),query_output['icd_first1'],query_output['icd_first2'],query_output['icd_first4']))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Primary ICD-10 codes by careunit"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Primary ICD diagnoses by ICU stay:\n",
      "\n",
      "           CICU  General ICU    NICU   PICU   SICU  Total\n",
      "A00-B99     4.0        117.0    44.0   91.0   12.0    268\n",
      "C00-D48    26.0        198.0     3.0   49.0  230.0    506\n",
      "D50-D89     0.0        167.0    11.0   95.0   10.0    283\n",
      "E00-E90     4.0         64.0    37.0   57.0   22.0    184\n",
      "F00-F90     0.0         58.0     1.0    2.0    2.0     63\n",
      "G00-G90     0.0        201.0    38.0  274.0  246.0    759\n",
      "H00-H59     0.0          0.0     9.0    2.0    8.0     19\n",
      "H60-H95     0.0          0.0     0.0    1.0    3.0      4\n",
      "I00-I99   314.0        110.0    78.0  153.0  110.0    765\n",
      "J00-J99   189.0        430.0   173.0  518.0   65.0   1375\n",
      "K00-K93    56.0        184.0   184.0  102.0  483.0   1009\n",
      "L00-L99     0.0          7.0     4.0    7.0   19.0     37\n",
      "M           5.0         31.0     7.0   27.0  150.0    220\n",
      "M00-M99     0.0         43.0    14.0   11.0    7.0     75\n",
      "N00-N99     0.0        388.0     8.0   13.0   12.0    421\n",
      "O00-O99     0.0          0.0     4.0    0.0    0.0      4\n",
      "P00-P96    34.0         12.0  1801.0   11.0   27.0   1885\n",
      "Q00-Q99  1962.0        199.0   463.0   74.0  699.0   3397\n",
      "R00-R99    30.0        256.0   307.0  327.0  352.0   1272\n",
      "S00-T98     1.0        159.0    16.0  220.0  272.0    668\n",
      "V01-Y98     1.0         17.0     1.0   39.0   32.0     90\n",
      "Z00-Z99    11.0          4.0     2.0   10.0   34.0     61\n",
      "\n",
      "Proportion by careunit:\n",
      "\n",
      "              CICU  General ICU       NICU       PICU       SICU      Total\n",
      "A00-B99   0.151688     4.423440   1.372855   4.368699   0.429338   2.005238\n",
      "C00-D48   0.985969     7.485822   0.093604   2.352376   8.228980   3.786008\n",
      "D50-D89   0.000000     6.313800   0.343214   4.560730   0.357782   2.117471\n",
      "E00-E90   0.151688     2.419660   1.154446   2.736438   0.787120   1.376730\n",
      "F00-F90   0.000000     2.192817   0.031201   0.096015   0.071556   0.471380\n",
      "G00-G90   0.000000     7.599244   1.185647  13.154105   8.801431   5.679012\n",
      "H00-H59   0.000000     0.000000   0.280811   0.096015   0.286225   0.142162\n",
      "H60-H95   0.000000     0.000000   0.000000   0.048008   0.107335   0.029929\n",
      "I00-I99  11.907471     4.158790   2.433697   7.345175   3.935599   5.723906\n",
      "J00-J99   7.167235    16.257089   5.397816  24.867979   2.325581  10.288066\n",
      "K00-K93   2.123625     6.956522   5.741030   4.896783  17.280859   7.549570\n",
      "L00-L99   0.000000     0.264650   0.124805   0.336054   0.679785   0.276842\n",
      "M         0.189609     1.172023   0.218409   1.296207   5.366726   1.646091\n",
      "M00-M99   0.000000     1.625709   0.436817   0.528084   0.250447   0.561167\n",
      "N00-N99   0.000000    14.669187   0.249610   0.624100   0.429338   3.150019\n",
      "O00-O99   0.000000     0.000000   0.124805   0.000000   0.000000   0.029929\n",
      "P00-P96   1.289344     0.453686  56.193448   0.528084   0.966011  14.104003\n",
      "Q00-Q99  74.402730     7.523629  14.446178   3.552568  25.008945  25.417134\n",
      "R00-R99   1.137656     9.678639   9.578783  15.698512  12.593918   9.517396\n",
      "S00-T98   0.037922     6.011342   0.499220  10.561690   9.731664   4.998129\n",
      "V01-Y98   0.037922     0.642722   0.031201   1.872300   1.144902   0.673401\n",
      "Z00-Z99   0.417141     0.151229   0.062402   0.480077   1.216458   0.456416\n"
     ]
    }
   ],
   "source": [
    "print('Primary ICD diagnoses by ICU stay:\\n')\n",
    "a=query_output.drop_duplicates(['hadm_id']).loc[query_output.first_careunit=='CICU']['icd_category'].value_counts()\n",
    "b=query_output.drop_duplicates(['hadm_id']).loc[query_output.first_careunit=='General ICU']['icd_category'].value_counts()\n",
    "c=query_output.drop_duplicates(['hadm_id']).loc[query_output.first_careunit=='NICU']['icd_category'].value_counts()\n",
    "d=query_output.drop_duplicates(['hadm_id']).loc[query_output.first_careunit=='PICU']['icd_category'].value_counts()\n",
    "e=query_output.drop_duplicates(['hadm_id']).loc[query_output.first_careunit=='SICU']['icd_category'].value_counts()\n",
    "f=query_output.drop_duplicates(['hadm_id'])['icd_category'].value_counts()\n",
    "df_num=pd.concat([a,b,c,d,e,f],axis=1,sort=False)\n",
    "df_num.fillna(value=0, inplace=True)\n",
    "df_num.columns = ['CICU', 'General ICU','NICU','PICU','SICU','Total']\n",
    "# Append a totals row\n",
    "print(df_num.sort_index())\n",
    "\n",
    "print('\\nProportion by careunit:\\n')\n",
    "a=(a/query_output.drop_duplicates(['hadm_id']).loc[query_output.first_careunit=='CICU']['icd_category'].count())*100\n",
    "b=(b/query_output.drop_duplicates(['hadm_id']).loc[query_output.first_careunit=='General ICU']['icd_category'].count())*100\n",
    "c=(c/query_output.drop_duplicates(['hadm_id']).loc[query_output.first_careunit=='NICU']['icd_category'].count())*100\n",
    "d=(d/query_output.drop_duplicates(['hadm_id']).loc[query_output.first_careunit=='PICU']['icd_category'].count())*100\n",
    "e=(e/query_output.drop_duplicates(['hadm_id']).loc[query_output.first_careunit=='SICU']['icd_category'].count())*100\n",
    "f=(f/query_output.drop_duplicates(['hadm_id'])['icd_category'].count())*100\n",
    "df_percent=pd.concat([a,b,c,d,e,f],axis=1,sort=False)\n",
    "df_percent.fillna(value=0, inplace=True)\n",
    "df_percent.columns = ['CICU', 'General ICU','NICU','PICU','SICU','Total']\n",
    "# Append a totals row\n",
    "print(df_percent.sort_index())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Table 5 Amount of complete data grouped by table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CHARTEVENTS: 79.70852851513123\n",
      "DIAGNOSES_ICD: 99.37541824670978\n",
      "EMR_SYMPTOMS: 6.654769871365901\n",
      "INPUTEVENTS: 41.095992267083055\n",
      "LABEVENTS: 93.85084392891665\n",
      "MICROBIOLOGYEVENTS: 89.69440107071158\n",
      "OR_EXAM_REPORTS: 91.91017919547922\n",
      "OUTPUTEVENTS: 11.666294891813518\n",
      "PRESCRIPTIONS: 51.48338166406424\n",
      "SURGERY_VITAL_SIGNS: 47.71358465313406\n"
     ]
    }
   ],
   "source": [
    "query1 = \\\n",
    "\"\"\"\n",
    "SELECT distinct hadm_id from chartevents\n",
    "\"\"\"\n",
    "query_output1 = pd.read_sql_query(query1,con)\n",
    "print('CHARTEVENTS: {}' \\\n",
    "    .format((len(query_output1)/13449)*100))\n",
    "query2 = \\\n",
    "\"\"\"\n",
    "SELECT distinct hadm_id from diagnoses_icd\n",
    "\"\"\"\n",
    "query_output2 = pd.read_sql_query(query2,con)\n",
    "print('DIAGNOSES_ICD: {}' \\\n",
    "    .format((len(query_output2)/13449)*100))\n",
    "query3 = \\\n",
    "\"\"\"\n",
    "SELECT distinct hadm_id from emr_symptoms\n",
    "\"\"\"\n",
    "query_output3 = pd.read_sql_query(query3,con)\n",
    "print('EMR_SYMPTOMS: {}' \\\n",
    "    .format((len(query_output3)/13449)*100))\n",
    "query4 = \\\n",
    "\"\"\"\n",
    "SELECT distinct hadm_id from inputevents\n",
    "\"\"\"\n",
    "query_output4 = pd.read_sql_query(query4,con)\n",
    "print('INPUTEVENTS: {}' \\\n",
    "    .format((len(query_output4)/13449)*100))\n",
    "query5 = \\\n",
    "\"\"\"\n",
    "SELECT distinct hadm_id from labevents\n",
    "\"\"\"\n",
    "query_output5 = pd.read_sql_query(query5,con)\n",
    "print('LABEVENTS: {}' \\\n",
    "    .format((len(query_output5)/13449)*100))\n",
    "query6 = \\\n",
    "\"\"\"\n",
    "SELECT distinct hadm_id from microbiologyevents\n",
    "\"\"\"\n",
    "query_output6 = pd.read_sql_query(query6,con)\n",
    "print('MICROBIOLOGYEVENTS: {}' \\\n",
    "    .format((len(query_output6)/13449)*100))\n",
    "query7 = \\\n",
    "\"\"\"\n",
    "SELECT distinct hadm_id from or_exam_reports\n",
    "\"\"\"\n",
    "query_output7 = pd.read_sql_query(query7,con)\n",
    "print('OR_EXAM_REPORTS: {}' \\\n",
    "    .format((len(query_output7)/13449)*100))\n",
    "query8 = \\\n",
    "\"\"\"\n",
    "SELECT distinct hadm_id from outputevents\n",
    "\"\"\"\n",
    "query_output8 = pd.read_sql_query(query8,con)\n",
    "print('OUTPUTEVENTS: {}' \\\n",
    "    .format((len(query_output8)/13449)*100))\n",
    "query9 = \\\n",
    "\"\"\"\n",
    "SELECT distinct hadm_id from prescriptions\n",
    "\"\"\"\n",
    "query_output9 = pd.read_sql_query(query9,con)\n",
    "print('PRESCRIPTIONS: {}' \\\n",
    "    .format((len(query_output9)/13449)*100))\n",
    "query10 = \\\n",
    "\"\"\"\n",
    "SELECT distinct hadm_id from surgery_vital_signs\n",
    "\"\"\"\n",
    "query_output10 = pd.read_sql_query(query10,con)\n",
    "print('SURGERY_VITAL_SIGNS: {}' \\\n",
    "    .format((len(query_output10)/13449)*100))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
